{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "A100"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "429b36b0c878410093e860f3aa05d222": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e101befba7e74e899602398db757bf11",
              "IPY_MODEL_e89271fe9adc4cbcb845320ad152dc6b",
              "IPY_MODEL_8806af4922a3452c8b184f962f96fa57"
            ],
            "layout": "IPY_MODEL_b9db260d250043acb1f113e621c6d9f0"
          }
        },
        "e101befba7e74e899602398db757bf11": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_529ef7cf4d0b4cff80fcca6e8033122b",
            "placeholder": "​",
            "style": "IPY_MODEL_05a40d2b54714b5f9a99245f53021436",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "e89271fe9adc4cbcb845320ad152dc6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4cba1033a6c84685b6376b7a8a6d462d",
            "max": 2,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6d93d773f7bd43a6b069753a0a75a04d",
            "value": 2
          }
        },
        "8806af4922a3452c8b184f962f96fa57": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_087a05b68a6a4db3a6e12196f7d8783e",
            "placeholder": "​",
            "style": "IPY_MODEL_312352aa240b4934916690f117efa2fa",
            "value": " 2/2 [00:03&lt;00:00,  1.63s/it]"
          }
        },
        "b9db260d250043acb1f113e621c6d9f0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "529ef7cf4d0b4cff80fcca6e8033122b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "05a40d2b54714b5f9a99245f53021436": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4cba1033a6c84685b6376b7a8a6d462d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6d93d773f7bd43a6b069753a0a75a04d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "087a05b68a6a4db3a6e12196f7d8783e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "312352aa240b4934916690f117efa2fa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_bJ0vKOP_S4X",
        "outputId": "9e7f8d2d-037a-4521-a0df-3530dc272913"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "如何微调DeepSeek编码器\n",
        "\n",
        "我们为用户提供脚本finetune_deepseekcoder.py，以便在下游任务中微调我们的模型。\n",
        "该脚本支持使用DeepSpeed进行训练。您需要通过以下方式安装所需的软件包：\n",
        "pip install-r requirements.txt\n",
        "请按照样本数据集格式准备您的培训数据。每一行都是一个json序列化字符串，包含两个必需的字段指令和输出。\n",
        "准备好数据后，可以使用示例shell脚本对deepseek ai/depseek-coder-6.7b-instruction进行微调。\n",
        "请记住指定DATA_PATH、OUTPUT_PATH。请根据您的场景选择适当的超参数（例如，learning_rate、per_device_train_batch_size）。"
      ],
      "metadata": {
        "id": "x3b2Ks_qcXD1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/deepseek-ai/DeepSeek-Coder.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3a8rAzyR_0C6",
        "outputId": "452bef5e-c12f-407d-bd80-17c62f13e89d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'DeepSeek-Coder'...\n",
            "remote: Enumerating objects: 482, done.\u001b[K\n",
            "remote: Counting objects: 100% (204/204), done.\u001b[K\n",
            "remote: Compressing objects: 100% (107/107), done.\u001b[K\n",
            "remote: Total 482 (delta 154), reused 124 (delta 97), pack-reused 278\u001b[K\n",
            "Receiving objects: 100% (482/482), 12.49 MiB | 16.52 MiB/s, done.\n",
            "Resolving deltas: 100% (213/213), done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://www.modelscope.cn/deepseek-ai/deepseek-coder-6.7b-instruct.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HwDtqiQDHQEs",
        "outputId": "be0b785f-6f14-4fa2-c0c0-d029dce7f1d6"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'deepseek-coder-6.7b-instruct'...\n",
            "remote: Enumerating objects: 23, done.\u001b[K\n",
            "remote: Counting objects: 100% (23/23), done.\u001b[K\n",
            "remote: Compressing objects: 100% (20/20), done.\u001b[K\n",
            "remote: Total 23 (delta 4), reused 0 (delta 0), pack-reused 0\u001b[K\n",
            "Receiving objects: 100% (23/23), 393.54 KiB | 1.55 MiB/s, done.\n",
            "Resolving deltas: 100% (4/4), done.\n",
            "Filtering content: 100% (3/3), 4.55 GiB | 14.26 MiB/s, done.\n",
            "Encountered 1 file(s) that may not have been copied correctly on Windows:\n",
            "\tpytorch_model-00001-of-00002.bin\n",
            "\n",
            "See: `git lfs help smudge` for more details.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r /content/DeepSeek-Coder/requirements.txt"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "qhcW9oOv_4Te",
        "outputId": "090e12a4-b273-4df6-a82e-f1a6278c281d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: torch>=2.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/requirements.txt (line 1)) (2.1.0+cu121)\n",
            "Requirement already satisfied: tokenizers>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/requirements.txt (line 2)) (0.15.2)\n",
            "Requirement already satisfied: transformers>=4.35.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/requirements.txt (line 3)) (4.38.2)\n",
            "Collecting accelerate (from -r /content/DeepSeek-Coder/requirements.txt (line 4))\n",
            "  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)\n",
            "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/280.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m \u001b[32m276.5/280.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m280.0/280.0 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: sympy==1.12 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/requirements.txt (line 5)) (1.12)\n",
            "Collecting pebble (from -r /content/DeepSeek-Coder/requirements.txt (line 6))\n",
            "  Downloading Pebble-5.0.6-py3-none-any.whl (30 kB)\n",
            "Collecting timeout-decorator (from -r /content/DeepSeek-Coder/requirements.txt (line 7))\n",
            "  Downloading timeout-decorator-0.5.0.tar.gz (4.8 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting attrdict (from -r /content/DeepSeek-Coder/requirements.txt (line 8))\n",
            "  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy==1.12->-r /content/DeepSeek-Coder/requirements.txt (line 5)) (1.3.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (3.13.1)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (4.10.0)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (3.2.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (3.1.3)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (2023.6.0)\n",
            "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (2.1.0)\n",
            "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.14.0->-r /content/DeepSeek-Coder/requirements.txt (line 2)) (0.20.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (1.25.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (23.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (2023.12.25)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (2.31.0)\n",
            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (0.4.2)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (4.66.2)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->-r /content/DeepSeek-Coder/requirements.txt (line 4)) (5.9.5)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from attrdict->-r /content/DeepSeek-Coder/requirements.txt (line 8)) (1.16.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.0->-r /content/DeepSeek-Coder/requirements.txt (line 1)) (2.1.5)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/requirements.txt (line 3)) (2024.2.2)\n",
            "Building wheels for collected packages: timeout-decorator\n",
            "  Building wheel for timeout-decorator (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for timeout-decorator: filename=timeout_decorator-0.5.0-py3-none-any.whl size=5004 sha256=b1608c680ac82e5dba9a7d838d2398e57cde69a151d3249065c5e9d320567711\n",
            "  Stored in directory: /root/.cache/pip/wheels/68/2f/bc/76f1192d474666d41ae6f09813fccbd00fe3f07e8261c4cff5\n",
            "Successfully built timeout-decorator\n",
            "Installing collected packages: timeout-decorator, pebble, attrdict, accelerate\n",
            "Successfully installed accelerate-0.27.2 attrdict-2.0.1 pebble-5.0.6 timeout-decorator-0.5.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
        "import torch\n",
        "\n",
        "# 指定模型和tokenizer的路径\n",
        "model_path = '/content/deepseek-coder-6.7b-instruct'\n",
        "\n",
        "# 加载预训练模型和tokenizer\n",
        "model = AutoModelForCausalLM.from_pretrained(model_path)\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
        "\n",
        "# 准备输入文本\n",
        "text = \"#write a quick sort algorithm\"\n",
        "\n",
        "# 编码输入文本\n",
        "input_ids = tokenizer.encode(text, return_tensors='pt')\n",
        "\n",
        "# 进行预测\n",
        "with torch.no_grad():\n",
        "    outputs = model.generate(input_ids, max_length=50)\n",
        "\n",
        "# 解码模型输出\n",
        "predicted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
        "print(predicted_text)\n"
      ],
      "metadata": {
        "id": "hfwFuEl_ALmI",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 307,
          "referenced_widgets": [
            "429b36b0c878410093e860f3aa05d222",
            "e101befba7e74e899602398db757bf11",
            "e89271fe9adc4cbcb845320ad152dc6b",
            "8806af4922a3452c8b184f962f96fa57",
            "b9db260d250043acb1f113e621c6d9f0",
            "529ef7cf4d0b4cff80fcca6e8033122b",
            "05a40d2b54714b5f9a99245f53021436",
            "4cba1033a6c84685b6376b7a8a6d462d",
            "6d93d773f7bd43a6b069753a0a75a04d",
            "087a05b68a6a4db3a6e12196f7d8783e",
            "312352aa240b4934916690f117efa2fa"
          ]
        },
        "outputId": "b8707493-af22-43e8-e673-dd06183b69ce"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "429b36b0c878410093e860f3aa05d222"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
            "  return self.fget.__get__(instance, owner)()\n",
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:32014 for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "#write a quick sort algorithm in python\n",
            "\n",
            "\n",
            "def quick_sort(arr):\n",
            "    if len(arr) <= 1:\n",
            "        return arr\n",
            "    else:\n",
            "        pivot = arr[0]\n",
            "        less =\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd"
      ],
      "metadata": {
        "id": "WeM8Iw4GA3Xc",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "25903dee-eeb3-4c8f-ac1a-c6e53de61411"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/root\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r /content/DeepSeek-Coder/finetune/requirements.txt"
      ],
      "metadata": {
        "id": "K72A025tAI6R",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1056b221-3799-4902-c974-3c7190f0e552"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: torch>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (2.1.0+cu121)\n",
            "Requirement already satisfied: tokenizers>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 2)) (0.15.2)\n",
            "Requirement already satisfied: transformers>=4.35.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (4.38.2)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 4)) (0.27.2)\n",
            "Requirement already satisfied: attrdict in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 5)) (2.0.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 6)) (4.66.2)\n",
            "Collecting deepspeed (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 8))\n",
            "  Downloading deepspeed-0.13.5.tar.gz (1.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting datasets (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 9))\n",
            "  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting tensorboardX (from -r /content/DeepSeek-Coder/finetune/requirements.txt (line 10))\n",
            "  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (3.13.1)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (4.10.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (3.2.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (3.1.3)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (2023.6.0)\n",
            "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (2.1.0)\n",
            "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.14.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 2)) (0.20.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (1.25.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (23.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (2023.12.25)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (2.31.0)\n",
            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (0.4.2)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 4)) (5.9.5)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from attrdict->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 5)) (1.16.0)\n",
            "Collecting hjson (from deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8))\n",
            "  Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ninja (from deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8))\n",
            "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8)) (9.0.0)\n",
            "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8)) (2.6.3)\n",
            "Collecting pynvml (from deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8))\n",
            "  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (14.0.2)\n",
            "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (0.6)\n",
            "Collecting dill<0.3.9,>=0.3.0 (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9))\n",
            "  Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (1.5.3)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (3.4.1)\n",
            "Collecting multiprocess (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9))\n",
            "  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (3.9.3)\n",
            "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.10/dist-packages (from tensorboardX->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 10)) (3.20.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (23.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (1.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (6.0.5)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (1.9.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.35.0->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 3)) (2024.2.2)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (2.1.5)\n",
            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 9)) (2023.4)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8)) (0.6.0)\n",
            "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 8)) (2.16.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=2.0.1->-r /content/DeepSeek-Coder/finetune/requirements.txt (line 1)) (1.3.0)\n",
            "Building wheels for collected packages: deepspeed\n",
            "  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for deepspeed: filename=deepspeed-0.13.5-py3-none-any.whl size=1370576 sha256=2b3b2cd7d2a6bbdfb42325010eff79a720709de4523d78b4fc975224564c829b\n",
            "  Stored in directory: /root/.cache/pip/wheels/b2/0a/43/6e1f12bbb7f5cb2762aba38dc98225956ae6a4c37bf900925a\n",
            "Successfully built deepspeed\n",
            "Installing collected packages: ninja, hjson, tensorboardX, pynvml, dill, multiprocess, deepspeed, datasets\n",
            "Successfully installed datasets-2.18.0 deepspeed-0.13.5 dill-0.3.8 hjson-3.1.0 multiprocess-0.70.16 ninja-1.11.1.1 pynvml-11.5.0 tensorboardX-2.6.2.2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "DATA_PATH=\"/content/drive/MyDrive/EvolInstruct-Code-80k.json\"\n",
        "OUTPUT_PATH=\"/content/drive/MyDrive/output\"\n",
        "MODEL_PATH=\"/content/deepseek-coder-6.7b-instruct\""
      ],
      "metadata": {
        "id": "rMqYTCT5wenB"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!deepspeed /content/DeepSeek-Coder/finetune/finetune_deepseekcoder.py \\\n",
        "    --model_name_or_path $MODEL_PATH \\\n",
        "    --data_path $DATA_PATH \\\n",
        "    --output_dir $OUTPUT_PATH \\\n",
        "    --num_train_epochs 1 \\\n",
        "    --model_max_length 1024 \\\n",
        "    --per_device_train_batch_size 1 \\\n",
        "    --per_device_eval_batch_size 1 \\\n",
        "    --gradient_accumulation_steps 2 \\\n",
        "    --evaluation_strategy \"no\" \\\n",
        "    --save_strategy \"epoch\" \\\n",
        "    --save_steps 100 \\\n",
        "    --save_total_limit 2 \\\n",
        "    --learning_rate 1e-5 \\\n",
        "    --warmup_steps 10 \\\n",
        "    --logging_steps 100 \\\n",
        "    --lr_scheduler_type \"cosine\" \\\n",
        "    --gradient_checkpointing True \\\n",
        "    --report_to \"tensorboard\" \\\n",
        "    --deepspeed /content/DeepSeek-Coder/finetune/configs/ds_config_zero3.json \\\n",
        "    --fp16 True"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "MFOZqoGvyLSh",
        "outputId": "24d4e369-7c66-4aca-8396-571d11a907af"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[2024-03-07 03:22:26,195] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
            "[2024-03-07 03:22:28,916] [WARNING] [runner.py:202:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.\n",
            "[2024-03-07 03:22:28,916] [INFO] [runner.py:568:main] cmd = /usr/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None /content/DeepSeek-Coder/finetune/finetune_deepseekcoder.py --model_name_or_path /content/deepseek-coder-6.7b-instruct --data_path /content/drive/MyDrive/EvolInstruct-Code-80k.json --output_dir /content/drive/MyDrive/output --num_train_epochs 1 --model_max_length 1024 --per_device_train_batch_size 1 --per_device_eval_batch_size 1 --gradient_accumulation_steps 2 --evaluation_strategy no --save_strategy epoch --save_steps 100 --save_total_limit 2 --learning_rate 1e-5 --warmup_steps 10 --logging_steps 100 --lr_scheduler_type cosine --gradient_checkpointing True --report_to tensorboard --deepspeed /content/DeepSeek-Coder/finetune/configs/ds_config_zero3.json --fp16 True\n",
            "[2024-03-07 03:22:32,528] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
            "[2024-03-07 03:22:33,793] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.2\n",
            "[2024-03-07 03:22:33,793] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1\n",
            "[2024-03-07 03:22:33,793] [INFO] [launch.py:138:main] 0 NCCL_VERSION=2.19.3-1\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE=libnccl2=2.19.3-1+cuda12.2\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_NAME=libnccl2\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_VERSION=2.19.3-1\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:145:main] WORLD INFO DICT: {'localhost': [0]}\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:151:main] nnodes=1, num_local_procs=1, node_rank=0\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0]})\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:163:main] dist_world_size=1\n",
            "[2024-03-07 03:22:33,794] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0\n",
            "[2024-03-07 03:22:33,795] [INFO] [launch.py:253:main] process 18386 spawned with command: ['/usr/bin/python3', '-u', '/content/DeepSeek-Coder/finetune/finetune_deepseekcoder.py', '--local_rank=0', '--model_name_or_path', '/content/deepseek-coder-6.7b-instruct', '--data_path', '/content/drive/MyDrive/EvolInstruct-Code-80k.json', '--output_dir', '/content/drive/MyDrive/output', '--num_train_epochs', '1', '--model_max_length', '1024', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '2', '--evaluation_strategy', 'no', '--save_strategy', 'epoch', '--save_steps', '100', '--save_total_limit', '2', '--learning_rate', '1e-5', '--warmup_steps', '10', '--logging_steps', '100', '--lr_scheduler_type', 'cosine', '--gradient_checkpointing', 'True', '--report_to', 'tensorboard', '--deepspeed', '/content/DeepSeek-Coder/finetune/configs/ds_config_zero3.json', '--fp16', 'True']\n",
            "2024-03-07 03:22:39.259117: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-03-07 03:22:39.259219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-03-07 03:22:39.372329: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-03-07 03:22:41.752693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "[2024-03-07 03:22:44,589] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
            "[2024-03-07 03:22:45,092] [INFO] [comm.py:637:init_distributed] cdb=None\n",
            "[2024-03-07 03:22:45,092] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n",
            "====================================================================================================\n",
            "TrainingArguments(\n",
            "_n_gpu=1,\n",
            "accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True},\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "cache_dir=None,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_persistent_workers=False,\n",
            "dataloader_pin_memory=True,\n",
            "dataloader_prefetch_factor=None,\n",
            "ddp_backend=None,\n",
            "ddp_broadcast_buffers=None,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=/content/DeepSeek-Coder/finetune/configs/ds_config_zero3.json,\n",
            "disable_tqdm=False,\n",
            "dispatch_batches=None,\n",
            "do_eval=False,\n",
            "do_predict=False,\n",
            "do_train=False,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=None,\n",
            "evaluation_strategy=no,\n",
            "fp16=True,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=2,\n",
            "gradient_checkpointing=True,\n",
            "gradient_checkpointing_kwargs=None,\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_always_push=False,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "include_num_input_tokens_seen=False,\n",
            "include_tokens_per_second=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=1e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=0,\n",
            "log_level=passive,\n",
            "log_level_replica=warning,\n",
            "log_on_each_node=True,\n",
            "logging_dir=/content/drive/MyDrive/output/runs/Mar07_03-22-43_5b0e7e44665f,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=100,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_kwargs={},\n",
            "lr_scheduler_type=cosine,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=-1,\n",
            "metric_for_best_model=None,\n",
            "model_max_length=1024,\n",
            "mp_parameters=,\n",
            "neftune_noise_alpha=None,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_torch,\n",
            "optim_args=None,\n",
            "output_dir=/content/drive/MyDrive/output,\n",
            "overwrite_output_dir=False,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=1,\n",
            "per_device_train_batch_size=1,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=/content/drive/MyDrive/output,\n",
            "save_on_each_node=False,\n",
            "save_only_model=False,\n",
            "save_safetensors=True,\n",
            "save_steps=100.0,\n",
            "save_strategy=epoch,\n",
            "save_total_limit=2,\n",
            "seed=42,\n",
            "skip_memory_metrics=True,\n",
            "split_batches=None,\n",
            "tf32=None,\n",
            "torch_compile=False,\n",
            "torch_compile_backend=None,\n",
            "torch_compile_mode=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_cpu=False,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=10,\n",
            "weight_decay=0.0,\n",
            ")\n",
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "PAD Token: <｜end▁of▁sentence｜> 32014\n",
            "BOS Token <｜begin▁of▁sentence｜> 32013\n",
            "EOS Token <｜end▁of▁sentence｜> 32014\n",
            "Load tokenizer from /content/deepseek-coder-6.7b-instruct over.\n",
            "[2024-03-07 03:23:08,307] [INFO] [partition_parameters.py:343:__exit__] finished initializing model - num_params = 291, num_elems = 6.74B\n",
            "Loading checkpoint shards:   0% 0/2 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
            "  return self.fget.__get__(instance, owner)()\n",
            "Loading checkpoint shards: 100% 2/2 [01:01<00:00, 30.87s/it]\n",
            "Load model from /content/deepseek-coder-6.7b-instruct over.\n",
            "Training dataset samples: 78264\n",
            "Sample 57773 of the training set: [32013, 2042, 417, 274, 20926, 14244, 20391, 11, 26696, 254, 20676, 8041, 74, 339, 8589, 2008, 11, 6908, 457, 20676, 8041, 74, 7958, 11, 285, 340, 885, 3495, 4301, 4512, 276, 4531, 8214, 13, 1487, 4636, 2223, 13143, 4301, 11, 5411, 285, 13936, 4447, 11, 285, 746, 2159, 12, 13517, 250, 8214, 4301, 11, 340, 540, 20857, 276, 3495, 13, 185, 13518, 3649, 3475, 25, 185, 7256, 245, 2031, 279, 9840, 344, 4486, 274, 3857, 365, 979, 4856, 9972, 5744, 372, 2773, 285, 25061, 254, 1228, 280, 17876, 254, 1019, 1594, 276, 254, 1813, 280, 254, 1856, 1594, 13, 428, 2031, 1020, 6429, 3851, 1064, 254, 2773, 3857, 1214, 441, 3768, 979, 4856, 9972, 5744, 285, 3786, 274, 2179, 3953, 279, 1109, 3851, 13, 185, 185, 3511, 11471, 11, 254, 2031, 1020, 2332, 562, 254, 2773, 5744, 417, 2372, 245, 7122, 3160, 13, 428, 3160, 1020, 330, 4212, 372, 254, 8402, 9972, 1594, 6612, 849, 254, 1019, 2773, 1594, 285, 254, 19702, 9972, 1594, 5565, 849, 254, 1856, 2773, 1594, 13, 1271, 254, 2773, 5744, 417, 441, 2372, 437, 3160, 11, 254, 2031, 1020, 3786, 274, 2179, 3953, 13, 185, 185, 1978, 2194, 11, 562, 254, 2773, 3857, 317, 821, 20, 11, 207, 17, 2705, 254, 2031, 1020, 11489, 207, 20, 8475, 276, 254, 1813, 280, 207, 17, 11, 585, 317, 207, 17, 20, 13, 3149, 11, 562, 254, 2773, 3857, 317, 821, 22, 11, 207, 18, 2705, 254, 2031, 1020, 3786, 274, 2179, 3953, 1952, 207, 22, 317, 441, 2372, 254, 3160, 280, 9972, 5744, 334, 17, 11, 207, 18, 11, 207, 20, 11, 207, 22, 628, 185, 185, 1889, 2332, 562, 245, 1594, 317, 9972, 11, 340, 1020, 4341, 245, 7039, 2040, 344, 7579, 1917, 562, 254, 1594, 317, 9972, 285, 2515, 6617, 13, 997, 2040, 1020, 931, 245, 686, 7522, 6713, 849, 3915, 13608, 2987, 262, 3446, 457, 519, 5744, 2236, 849, 359, 13, 185, 185, 24272, 11, 254, 2031, 1020, 931, 7472, 14326, 276, 6429, 683, 2188, 23222, 11, 1109, 372, 562, 254, 2773, 3857, 317, 2352, 409, 562, 254, 2773, 5744, 417, 441, 4856, 13, 680, 1109, 3851, 11, 274, 6854, 2179, 3953, 1020, 330, 12315, 13, 185, 185, 6014, 25, 1255, 482, 6044, 344, 254, 2773, 5744, 540, 1948, 330, 26682, 13, 185, 13518, 21289, 25, 185, 4888, 317, 254, 9840, 2031, 344, 18957, 254, 7271, 25, 185, 185, 10252, 4660, 185, 1892, 5075, 13, 2303, 13, 1966, 20011, 26, 185, 185, 3566, 757, 20622, 20327, 507, 185, 185, 315, 1270, 3314, 2494, 1959, 7, 2005, 4807, 10936, 8, 507, 185, 436, 1097, 4807, 2773, 405, 507, 20, 11, 207, 17, 3871, 1069, 18531, 2773, 3857, 185, 436, 1675, 507, 185, 655, 1097, 1228, 405, 11489, 20327, 7, 3584, 477, 185, 655, 3639, 13, 406, 13, 12705, 1195, 4835, 25, 440, 945, 1228, 477, 185, 436, 611, 5765, 334, 26869, 10804, 28151, 300, 8, 507, 185, 655, 3639, 13, 406, 13, 12705, 7, 68, 13, 31426, 3705, 185, 436, 611, 185, 315, 611, 185, 185, 315, 1270, 3314, 1097, 11489, 20327, 7, 569, 4807, 2773, 8, 507, 185, 436, 562, 334, 3584, 2312, 2352, 1550, 2773, 13, 4082, 2069, 4084, 17, 8, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 3857, 1526, 3768, 5082, 979, 5744, 2583, 185, 436, 611, 185, 185, 436, 1097, 3240, 405, 2773, 58, 15, 2818, 185, 436, 1097, 29663, 405, 2773, 58, 16, 2818, 185, 185, 436, 562, 5036, 262, 5959, 590, 7, 4907, 8, 1550, 2069, 262, 5959, 590, 7, 790, 4421, 1435, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 3857, 1526, 3768, 979, 4856, 9972, 5744, 2583, 185, 436, 611, 185, 185, 436, 562, 5036, 262, 769, 11142, 7, 4907, 11, 29663, 1435, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 5744, 417, 441, 2372, 254, 7122, 3160, 2583, 185, 436, 611, 185, 185, 436, 967, 334, 569, 8, 12404, 13, 17257, 7, 4907, 11, 29663, 477, 185, 315, 611, 185, 185, 315, 1270, 3314, 7248, 317, 5959, 590, 7, 569, 1594, 8, 507, 185, 436, 562, 334, 7675, 22322, 16, 8, 507, 185, 655, 967, 2515, 26, 185, 436, 611, 185, 436, 327, 334, 569, 460, 1412, 17, 26, 460, 14443, 12404, 13, 4215, 7, 7675, 477, 460, 3868, 8, 507, 185, 655, 562, 334, 7675, 3018, 460, 7400, 15, 8, 507, 185, 1044, 967, 2515, 26, 185, 655, 611, 185, 436, 611, 185, 436, 967, 1917, 26, 185, 315, 611, 185, 185, 315, 1270, 3314, 7248, 317, 769, 11142, 7, 569, 3240, 11, 1097, 29663, 8, 507, 185, 436, 1097, 8402, 25915, 250, 5959, 590, 405, 3240, 567, 207, 16, 26, 185, 436, 1470, 5036, 262, 5959, 590, 7, 17819, 370, 25915, 250, 5959, 590, 1435, 507, 185, 655, 8402, 25915, 250, 5959, 590, 374, 26, 185, 436, 611, 185, 185, 436, 1097, 19702, 43, 1139, 250, 5959, 590, 405, 29663, 4536, 16, 26, 185, 436, 1470, 5036, 262, 5959, 590, 7, 5612, 370, 43, 1139, 250, 5959, 590, 1435, 507, 185, 655, 19702, 43, 1139, 250, 5959, 590, 3868, 26, 185, 436, 611, 185, 185, 436, 967, 3240, 1938, 8402, 25915, 250, 5959, 590, 2956, 29663, 1013, 19702, 43, 1139, 250, 5959, 590, 26, 185, 315, 611, 185, 92, 185, 10252, 185, 185, 1488, 11767, 335, 25, 185, 185, 12, 428, 2220, 21359, 387, 20327, 63, 2040, 4486, 274, 3857, 372, 2773, 11, 21492, 519, 254, 4362, 14223, 11, 285, 7579, 254, 1228, 280, 17876, 254, 1019, 1594, 276, 254, 1813, 280, 254, 1856, 1594, 13, 185, 12, 428, 2220, 262, 5959, 590, 63, 2040, 14223, 562, 245, 1594, 317, 9972, 457, 7505, 1128, 473, 207, 17, 276, 254, 7748, 4330, 280, 254, 1594, 285, 13608, 562, 359, 317, 2987, 21240, 457, 683, 280, 1454, 5744, 13, 185, 12, 428, 2220, 262, 769, 11142, 63, 2040, 13402, 254, 8402, 9972, 1594, 6612, 849, 254, 1019, 2773, 1594, 285, 254, 19702, 9972, 1594, 5565, 849, 254, 1856, 2773, 1594, 13, 804, 930, 14223, 562, 254, 2773, 5744, 417, 2372, 437, 3160, 13, 185, 12, 680, 254, 2220, 5609, 63, 2040, 11, 274, 2194, 2773, 3857, 317, 1219, 11, 285, 254, 2220, 21359, 387, 20327, 63], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 4888, 317, 254, 9840, 2031, 344, 18957, 254, 7271, 25, 185, 185, 10252, 4660, 185, 1892, 5075, 13, 2303, 13, 1966, 20011, 26, 185, 185, 3566, 757, 20622, 20327, 507, 185, 185, 315, 1270, 3314, 2494, 1959, 7, 2005, 4807, 10936, 8, 507, 185, 436, 1097, 4807, 2773, 405, 507, 20, 11, 207, 17, 3871, 1069, 18531, 2773, 3857, 185, 436, 1675, 507, 185, 655, 1097, 1228, 405, 11489, 20327, 7, 3584, 477, 185, 655, 3639, 13, 406, 13, 12705, 1195, 4835, 25, 440, 945, 1228, 477, 185, 436, 611, 5765, 334, 26869, 10804, 28151, 300, 8, 507, 185, 655, 3639, 13, 406, 13, 12705, 7, 68, 13, 31426, 3705, 185, 436, 611, 185, 315, 611, 185, 185, 315, 1270, 3314, 1097, 11489, 20327, 7, 569, 4807, 2773, 8, 507, 185, 436, 562, 334, 3584, 2312, 2352, 1550, 2773, 13, 4082, 2069, 4084, 17, 8, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 3857, 1526, 3768, 5082, 979, 5744, 2583, 185, 436, 611, 185, 185, 436, 1097, 3240, 405, 2773, 58, 15, 2818, 185, 436, 1097, 29663, 405, 2773, 58, 16, 2818, 185, 185, 436, 562, 5036, 262, 5959, 590, 7, 4907, 8, 1550, 2069, 262, 5959, 590, 7, 790, 4421, 1435, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 3857, 1526, 3768, 979, 4856, 9972, 5744, 2583, 185, 436, 611, 185, 185, 436, 562, 5036, 262, 769, 11142, 7, 4907, 11, 29663, 1435, 507, 185, 655, 5245, 756, 28625, 28151, 1195, 6546, 5744, 417, 441, 2372, 254, 7122, 3160, 2583, 185, 436, 611, 185, 185, 436, 967, 334, 569, 8, 12404, 13, 17257, 7, 4907, 11, 29663, 477, 185, 315, 611, 185, 185, 315, 1270, 3314, 7248, 317, 5959, 590, 7, 569, 1594, 8, 507, 185, 436, 562, 334, 7675, 22322, 16, 8, 507, 185, 655, 967, 2515, 26, 185, 436, 611, 185, 436, 327, 334, 569, 460, 1412, 17, 26, 460, 14443, 12404, 13, 4215, 7, 7675, 477, 460, 3868, 8, 507, 185, 655, 562, 334, 7675, 3018, 460, 7400, 15, 8, 507, 185, 1044, 967, 2515, 26, 185, 655, 611, 185, 436, 611, 185, 436, 967, 1917, 26, 185, 315, 611, 185, 185, 315, 1270, 3314, 7248, 317, 769, 11142, 7, 569, 3240, 11, 1097, 29663, 8, 507, 185, 436, 1097, 8402, 25915, 250, 5959, 590, 405, 3240, 567, 207, 16, 26, 185, 436, 1470, 5036, 262, 5959, 590, 7, 17819, 370, 25915, 250, 5959, 590, 1435, 507, 185, 655, 8402, 25915, 250, 5959, 590, 374, 26, 185, 436, 611, 185, 185, 436, 1097, 19702, 43, 1139, 250, 5959, 590, 405, 29663, 4536, 16, 26, 185, 436, 1470, 5036, 262, 5959, 590, 7, 5612, 370, 43, 1139, 250, 5959, 590, 1435, 507, 185, 655, 19702, 43, 1139, 250, 5959, 590, 3868, 26, 185, 436, 611, 185, 185, 436, 967, 3240, 1938, 8402, 25915, 250, 5959, 590, 2956, 29663, 1013, 19702, 43, 1139, 250, 5959, 590, 26, 185, 315, 611, 185, 92, 185, 10252, 185, 185, 1488, 11767, 335, 25, 185, 185, 12, 428, 2220, 21359, 387, 20327, 63, 2040, 4486, 274, 3857, 372, 2773, 11, 21492, 519, 254, 4362, 14223, 11, 285, 7579, 254, 1228, 280, 17876, 254, 1019, 1594, 276, 254, 1813, 280, 254, 1856, 1594, 13, 185, 12, 428, 2220, 262, 5959, 590, 63, 2040, 14223, 562, 245, 1594, 317, 9972, 457, 7505, 1128, 473, 207, 17, 276, 254, 7748, 4330, 280, 254, 1594, 285, 13608, 562, 359, 317, 2987, 21240, 457, 683, 280, 1454, 5744, 13, 185, 12, 428, 2220, 262, 769, 11142, 63, 2040, 13402, 254, 8402, 9972, 1594, 6612, 849, 254, 1019, 2773, 1594, 285, 254, 19702, 9972, 1594, 5565, 849, 254, 1856, 2773, 1594, 13, 804, 930, 14223, 562, 254, 2773, 5744, 417, 2372, 437, 3160, 13, 185, 12, 680, 254, 2220, 5609, 63, 2040, 11, 274, 2194, 2773, 3857, 317, 1219, 11, 285, 254, 2220, 21359, 387, 20327, 63].\n",
            "Sample 57773 of the training set: <｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n",
            "### Instruction:\n",
            "Create a program in Java that takes an array with two positive prime numbers as input and prints the result of raising the first number to the power of the second number. The program should handle cases where the input array does not contain two positive prime numbers and display an error message in such cases.\n",
            "\n",
            "Additionally, the program should check if the input numbers are within a specified range. The range should be defined as the largest prime number smaller than the first input number and the smallest prime number larger than the second input number. If the input numbers are not within this range, the program should display an error message.\n",
            "\n",
            "For example, if the input array is [5, 2], the program should calculate 5 raised to the power of 2, which is 25. However, if the input array is [7, 3], the program should display an error message since 7 is not within the range of prime numbers (2, 3, 5, 7).\n",
            "\n",
            "To check if a number is prime, you should implement a separate method that returns true if the number is prime and false otherwise. This method should use a more efficient algorithm than simply checking divisibility by all numbers less than it.\n",
            "\n",
            "Furthermore, the program should use exception handling to handle any possible exceptions, such as if the input array is null or if the input numbers are not positive. In such cases, an appropriate error message should be displayed.\n",
            "\n",
            "Note: You can assume that the input numbers will always be integers.\n",
            "### Response:\n",
            "Here is the Java program that meets the requirements:\n",
            "\n",
            "```java\n",
            "import java.util.Arrays;\n",
            "\n",
            "public class PrimePower {\n",
            "\n",
            "    public static void main(String[] args) {\n",
            "        int[] input = {5, 2}; // Example input array\n",
            "        try {\n",
            "            int result = calculatePower(input);\n",
            "            System.out.println(\"Result: \" + result);\n",
            "        } catch (IllegalArgumentException e) {\n",
            "            System.out.println(e.getMessage());\n",
            "        }\n",
            "    }\n",
            "\n",
            "    public static int calculatePower(int[] input) {\n",
            "        if (input == null || input.length != 2) {\n",
            "            throw new IllegalArgumentException(\"Input array must contain exactly two numbers\");\n",
            "        }\n",
            "\n",
            "        int base = input[0];\n",
            "        int exponent = input[1];\n",
            "\n",
            "        if (!isPrime(base) || !isPrime(exponent)) {\n",
            "            throw new IllegalArgumentException(\"Input array must contain two positive prime numbers\");\n",
            "        }\n",
            "\n",
            "        if (!isInRange(base, exponent)) {\n",
            "            throw new IllegalArgumentException(\"Input numbers are not within the specified range\");\n",
            "        }\n",
            "\n",
            "        return (int) Math.pow(base, exponent);\n",
            "    }\n",
            "\n",
            "    public static boolean isPrime(int number) {\n",
            "        if (number <= 1) {\n",
            "            return false;\n",
            "        }\n",
            "        for (int i = 2; i <= Math.sqrt(number); i++) {\n",
            "            if (number % i == 0) {\n",
            "                return false;\n",
            "            }\n",
            "        }\n",
            "        return true;\n",
            "    }\n",
            "\n",
            "    public static boolean isInRange(int base, int exponent) {\n",
            "        int largestSmallerPrime = base - 1;\n",
            "        while (!isPrime(largestSmallerPrime)) {\n",
            "            largestSmallerPrime--;\n",
            "        }\n",
            "\n",
            "        int smallestLargerPrime = exponent + 1;\n",
            "        while (!isPrime(smallestLargerPrime)) {\n",
            "            smallestLargerPrime++;\n",
            "        }\n",
            "\n",
            "        return base > largestSmallerPrime && exponent < smallestLargerPrime;\n",
            "    }\n",
            "}\n",
            "```\n",
            "\n",
            "Explanation:\n",
            "\n",
            "- The `calculatePower` method takes an array as input, performs all the necessary checks, and returns the result of raising the first number to the power of the second number.\n",
            "- The `isPrime` method checks if a number is prime by iterating from 2 to the square root of the number and checking if it is divisible by any of those numbers.\n",
            "- The `isInRange` method finds the largest prime number smaller than the first input number and the smallest prime number larger than the second input number. It then checks if the input numbers are within this range.\n",
            "- In the `main` method, an example input array is used, and the `calculatePower`.\n",
            "Sample 1315 of the training set: [32013, 2042, 417, 274, 20926, 14244, 20391, 11, 26696, 254, 20676, 8041, 74, 339, 8589, 2008, 11, 6908, 457, 20676, 8041, 74, 7958, 11, 285, 340, 885, 3495, 4301, 4512, 276, 4531, 8214, 13, 1487, 4636, 2223, 13143, 4301, 11, 5411, 285, 13936, 4447, 11, 285, 746, 2159, 12, 13517, 250, 8214, 4301, 11, 340, 540, 20857, 276, 3495, 13, 185, 13518, 3649, 3475, 25, 185, 9913, 254, 1884, 11599, 2974, 276, 3417, 254, 2017, 1189, 13, 18567, 11, 5450, 344, 254, 4489, 317, 245, 4856, 10878, 1433, 207, 16, 23, 285, 207, 19, 15, 285, 344, 254, 1208, 317, 245, 2649, 365, 429, 2743, 207, 18, 7445, 13, 428, 761, 13954, 1020, 330, 2236, 849, 506, 7, 77, 650, 1064, 291, 317, 254, 3212, 1594, 280, 4728, 279, 254, 11599, 2974, 13, 185, 185, 90, 185, 207, 440, 16137, 2828, 207, 185, 207, 821, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 12040, 950, 185, 730, 440, 490, 2828, 207, 18, 15, 185, 315, 4637, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 41, 2145, 950, 185, 730, 440, 490, 2828, 207, 17, 20, 185, 315, 611, 185, 207, 6337, 185, 92, 185, 13518, 21289, 25, 185, 90, 185, 207, 440, 16137, 2828, 207, 185, 207, 821, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 12040, 950, 185, 730, 440, 490, 2828, 207, 18, 15, 185, 315, 4637, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 41, 2145, 950, 185, 730, 440, 490, 2828, 207, 17, 20, 185, 315, 4637, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 21708, 950, 185, 730, 440, 490, 2828, 207, 18, 20, 185, 315, 611, 185, 207, 6337, 185, 92, 185, 32021], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 90, 185, 207, 440, 16137, 2828, 207, 185, 207, 821, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 12040, 950, 185, 730, 440, 490, 2828, 207, 18, 15, 185, 315, 4637, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 41, 2145, 950, 185, 730, 440, 490, 2828, 207, 17, 20, 185, 315, 4637, 185, 315, 507, 207, 185, 730, 440, 1523, 2828, 440, 21708, 950, 185, 730, 440, 490, 2828, 207, 18, 20, 185, 315, 611, 185, 207, 6337, 185, 92, 185, 32021].\n",
            "Sample 1315 of the training set: <｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n",
            "### Instruction:\n",
            "Edit the following JSON code to include the given data. Additionally, ensure that the age is a positive integer between 18 and 40 and that the name is a string with at least 3 characters. The time complexity should be less than O(n), where n is the total number of users in the JSON code.\n",
            "\n",
            "{\n",
            "  \"users\": \n",
            "  [\n",
            "    { \n",
            "      \"name\": \"John\",\n",
            "      \"age\": 30\n",
            "    },\n",
            "    { \n",
            "      \"name\": \"Jane\",\n",
            "      \"age\": 25\n",
            "    }\n",
            "  ]\n",
            "}\n",
            "### Response:\n",
            "{\n",
            "  \"users\": \n",
            "  [\n",
            "    { \n",
            "      \"name\": \"John\",\n",
            "      \"age\": 30\n",
            "    },\n",
            "    { \n",
            "      \"name\": \"Jane\",\n",
            "      \"age\": 25\n",
            "    },\n",
            "    { \n",
            "      \"name\": \"Alex\",\n",
            "      \"age\": 35\n",
            "    }\n",
            "  ]\n",
            "}\n",
            "<|EOT|>.\n",
            "Sample 67912 of the training set: [32013, 2042, 417, 274, 20926, 14244, 20391, 11, 26696, 254, 20676, 8041, 74, 339, 8589, 2008, 11, 6908, 457, 20676, 8041, 74, 7958, 11, 285, 340, 885, 3495, 4301, 4512, 276, 4531, 8214, 13, 1487, 4636, 2223, 13143, 4301, 11, 5411, 285, 13936, 4447, 11, 285, 746, 2159, 12, 13517, 250, 8214, 4301, 11, 340, 540, 20857, 276, 3495, 13, 185, 13518, 3649, 3475, 25, 185, 7256, 245, 9840, 2031, 276, 1272, 279, 274, 10878, 473, 254, 2664, 285, 2816, 254, 1594, 4678, 11259, 13, 185, 13518, 21289, 25, 185, 1892, 5075, 13, 2303, 13, 25674, 1337, 26, 185, 185, 3566, 757, 11988, 35, 264, 9787, 507, 185, 315, 1270, 3314, 2494, 1959, 7, 2005, 4807, 10936, 8, 507, 185, 436, 22142, 1337, 10310, 1337, 405, 756, 22142, 1337, 7, 5240, 13, 246, 477, 185, 185, 436, 3639, 13, 406, 13, 4128, 1195, 14651, 274, 10878, 25, 24435, 185, 436, 1097, 1594, 405, 10310, 1337, 13, 4786, 3373, 1293, 185, 185, 436, 3639, 13, 406, 13, 12705, 1195, 546, 4678, 11259, 1594, 317, 25, 440, 945, 334, 7675, 572, 207, 17, 3183, 185, 315, 611, 185, 92, 185, 32021], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1892, 5075, 13, 2303, 13, 25674, 1337, 26, 185, 185, 3566, 757, 11988, 35, 264, 9787, 507, 185, 315, 1270, 3314, 2494, 1959, 7, 2005, 4807, 10936, 8, 507, 185, 436, 22142, 1337, 10310, 1337, 405, 756, 22142, 1337, 7, 5240, 13, 246, 477, 185, 185, 436, 3639, 13, 406, 13, 4128, 1195, 14651, 274, 10878, 25, 24435, 185, 436, 1097, 1594, 405, 10310, 1337, 13, 4786, 3373, 1293, 185, 185, 436, 3639, 13, 406, 13, 12705, 1195, 546, 4678, 11259, 1594, 317, 25, 440, 945, 334, 7675, 572, 207, 17, 3183, 185, 315, 611, 185, 92, 185, 32021].\n",
            "Sample 67912 of the training set: <｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n",
            "### Instruction:\n",
            "Create a Java program to read in an integer from the user and output the number doubled.\n",
            "### Response:\n",
            "import java.util.Scanner;\n",
            "\n",
            "public class NumberDoubling {\n",
            "    public static void main(String[] args) {\n",
            "        Scanner scanner = new Scanner(System.in);\n",
            "\n",
            "        System.out.print(\"Enter an integer: \");\n",
            "        int number = scanner.nextInt();\n",
            "\n",
            "        System.out.println(\"The doubled number is: \" + (number * 2));\n",
            "    }\n",
            "}\n",
            "<|EOT|>.\n",
            "Installed CUDA version 12.2 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n",
            "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n",
            "Detected CUDA files, patching ldflags\n",
            "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/cpu_adam/build.ninja...\n",
            "Building extension module cpu_adam...\n",
            "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
            "ninja: no work to do.\n",
            "Loading extension module cpu_adam...\n",
            "Time to load cpu_adam op: 2.440640687942505 seconds\n",
            "Parameter Offload: Total persistent parameters: 266240 in 65 params\n",
            "[2024-03-07 03:24:45,066] [INFO] [launch.py:316:sigkill_handler] Killing subprocess 18386\n",
            "[2024-03-07 03:24:45,066] [ERROR] [launch.py:322:sigkill_handler] ['/usr/bin/python3', '-u', '/content/DeepSeek-Coder/finetune/finetune_deepseekcoder.py', '--local_rank=0', '--model_name_or_path', '/content/deepseek-coder-6.7b-instruct', '--data_path', '/content/drive/MyDrive/EvolInstruct-Code-80k.json', '--output_dir', '/content/drive/MyDrive/output', '--num_train_epochs', '1', '--model_max_length', '1024', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '2', '--evaluation_strategy', 'no', '--save_strategy', 'epoch', '--save_steps', '100', '--save_total_limit', '2', '--learning_rate', '1e-5', '--warmup_steps', '10', '--logging_steps', '100', '--lr_scheduler_type', 'cosine', '--gradient_checkpointing', 'True', '--report_to', 'tensorboard', '--deepspeed', '/content/DeepSeek-Coder/finetune/configs/ds_config_zero3.json', '--fp16', 'True'] exits with return code = -9\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "nKoia9jut3rU"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}